source("../rscripts/merge_load_set4_Yanni_singlepulse.R")
source("../rscripts/package.R")
Plotting with stimulation signal:
Yanni_sp2 <- merge(Yanni_sp, StimYanni_sp, by = c("Condition", "RealTime"), allow.cartesian = T)
Yanni_sp2[, Condition := factor(Yanni_sp2$Condition, levels = levels(Yanni_sp$Condition))]
Yanni_sp2$Pulse_intensity.norm <- ifelse(Yanni_sp2$Pulse_intensity.norm > 3, 1.3, 0.95)
ggplot(Yanni_sp2, aes(x = RealTime)) + geom_line(aes(y=Ratio.norm, group=Label)) + geom_line(aes(y = Pulse_intensity.norm), col = "blue") + facet_wrap("Condition", ncol = 4) + stat_summary(aes(y=Ratio.norm), fun.y = mean, geom = "line", color = "red", size = 1.5) + theme(text = element_text(size = 25))
rm(Yanni_sp2)
Cut for times > 180min to have all conditions with same length.
Yanni_sp <- Yanni_sp[RealTime <= 180]
setkey(Yanni_sp, Condition, Label)
perform.PCA <- function(data, color = "Condition", na.fill = 1.2, value.var = "Ratio.norm", exp.var = c("Condition", "Label"), resp.var = "RealTime", plot.pca = F){
require(ggbiplot)
cast <- cast.and.fill(data, exp.var, resp.var, na.fill, value.var)
pca <- prcomp(cast$mat2, center = T, scale = T)
if(plot.pca) plot(pca)
g <- ggbiplot(pca, obs.scale = 1, var.scale = 1,
groups = unlist(cast$mat[, color]), ellipse = TRUE,
circle = TRUE, choices = c(1,2))
g <- g + scale_color_discrete(name = '')
g <- g + theme(legend.direction = 'horizontal',
legend.position = 'top')
return(g)
}
cast.and.fill <- function(data, exp.var = c("Condition", "Label"), resp.var = "RealTime", na.fill = 1.2, value.var = "Ratio.norm"){
formula <- as.formula(paste0(paste(exp.var, collapse = " + "), " ~ ", resp.var))
mat <- as.matrix(dcast(data, formula, value.var = value.var))
mat2 <- mat[,-(1:length(exp.var))]
class(mat2) <- "numeric"
mat2[which(is.na(mat2))] <- na.fill
return(list(mat = mat, mat2 = mat2))
}
perform.PCA(Yanni_sp[RealTime >= 40 & RealTime <= 75], plot.pca = T) + ggtitle("All pooled") + theme(text = element_text(size = 25))
Elbow method shows that considering only first 2 PC is fine.
EGF is taken appart along 2nd PC which is highly correlated with the very first time points. This is because EGF amplitude reaches a much larger value quicker than NGF or FGF. The 1st PC corresponds more to the “after-peak period”. Interestingly though it explains 50% of the variance, it cannot take appart the different conditions.
To understand it a bit better, try to color with different variables like length of pulse and concentrations:
Yanni_sp_pca <- copy(Yanni_sp)
Yanni_sp_pca[, c("gf","pulse_length","concentration") := list(str_extract(Condition, "[ENF]"), str_extract(Condition, "[0-9]+$"), str_extract(Condition, "[0-9]+(\\.[0-9]+)?"))]
perform.PCA(data = Yanni_sp_pca[RealTime >= 40 & RealTime <= 75], exp.var = c("Condition", "Label", "pulse_length", "concentration", "gf"), resp.var = "RealTime", color = "pulse_length") + ggtitle("All pooled - Color Pulse length") + theme(text = element_text(size = 25))
Lots of 10min on 2nd PC, but this isn’t very interesting since EGF and NGF are exclusively observed at 10min pulses.
perform.PCA(data = Yanni_sp_pca[RealTime >= 40 & RealTime <= 75], exp.var = c("Condition", "Label", "pulse_length", "concentration", "gf"), resp.var = "RealTime", color = "concentration") + ggtitle("All pooled - Color Concentration GF") + theme(text = element_text(size = 25))
1st PC is probably driven by “flat profiles” obtain for very low concentrations.
perform.PCA(data = Yanni_sp_pca[RealTime >= 40 & RealTime <= 75], exp.var = c("Condition", "Label", "pulse_length", "concentration", "gf"), resp.var = "RealTime", color = "gf") + ggtitle("All pooled - Color GF") + theme(text = element_text(size = 25))
Even with all conditions pooled together, we see that the 2nd PC very clearly take the GF appart.
perform.PCA(Yanni_sp[Condition %in% levels(Condition)[1:12] & RealTime >= 40 & RealTime <= 75], plot.pca = T) + ggtitle("E,N,F 10min pulse") + theme(text = element_text(size = 25))
Again we consider only 2 first PC.
1st PC hardly takes appart anything but N0.25-P10, so 1st PC is indeed probably driven by flat profiles. 2nd PC provides an interesting separation for the different GF.
perform.PCA(data = Yanni_sp_pca[Condition %in% levels(Condition)[1:12] & RealTime >= 40 & RealTime <= 75], exp.var = c("Condition", "Label", "pulse_length", "concentration", "gf"), resp.var = "RealTime", color = "concentration") + ggtitle("All pooled - Color concentration") + theme(text = element_text(size = 25))
perform.PCA(data = Yanni_sp_pca[Condition %in% levels(Condition)[1:12] & RealTime >= 40 & RealTime <= 75], exp.var = c("Condition", "Label", "pulse_length", "concentration", "gf"), resp.var = "RealTime", color = "gf") + ggtitle("All pooled - Color GF") + theme(text = element_text(size = 25))
NGF displays largest variability in these conditions, what is the opposite observation to the one in the sustained exposition dataset. EGF on the contrary is extremely compact, showing a consistent response whatever concentration with 10min pulse.
perform.PCA(Yanni_sp[Condition %in% levels(Condition)[1:4] & RealTime >= 40 & RealTime <= 75]) + ggtitle("EGF 10min pulse") + theme(text = element_text(size = 20))
Can’t really differentiate the concentrations
perform.PCA(Yanni_sp[Condition %in% levels(Condition)[5:8] & RealTime >= 40 & RealTime <= 75]) + ggtitle("NGF 10min pulse") + theme(text = element_text(size = 20))
NGF is extremely separable, notably at 0.25 ng/mL where the reponse is completely flat. Explained variance is very large with this dataset.
perform.PCA(Yanni_sp[Condition %in% levels(Condition)[9:12] & RealTime >= 40 & RealTime <= 75]) + ggtitle("FGF 10min pulse") + theme(text = element_text(size = 20))
sep.meas.along.time <- function(data1, data2, time.col, measure.col){
timev <- unique(data1[, get(time.col)])
if(!(identical(unique(data2[, get(time.col)]), timev))) stop("Time vectors must be identical between the two data")
out <- separability.measures(data1[get(time.col)==timev[1], get(measure.col)], data2[get(time.col)==timev[1], get(measure.col)])
for(t in timev[2:length(timev)]){
out <- rbind(out, separability.measures(data1[RealTime==t, get(measure.col)], data2[RealTime==t, get(measure.col)]))
}
out <- cbind(timev, out)
return(out)
}
library(plyr)
# Get all pairs of conditions
conditions_EGF <- combn(as.character(unique(Yanni_sp[,Condition])[1:4]), m = 2)
conditions_NGF <- combn(as.character(unique(Yanni_sp[,Condition])[5:8]), m = 2)
conditions_FGF <- combn(as.character(unique(Yanni_sp[,Condition])[9:12]), m = 2)
conditions <- cbind(conditions_EGF, conditions_NGF, conditions_FGF)
rm(conditions_EGF, conditions_FGF, conditions_NGF)
# Compute separabilities of conditions at each time point
sep.meas.raw <- apply(conditions, 2, function(x) sep.meas.along.time(Yanni_sp[Condition==x[1]], Yanni_sp[Condition==x[2]], "RealTime", "Ratio" ))
names(sep.meas.raw) <- apply(conditions, 2, function(x) paste(x[1], x[2], sep = ","))
# Go to data table
for(i in 1:length(sep.meas.raw)){
temp <- unlist(strsplit(names(sep.meas.raw)[i], ","))
sep.meas.raw[[i]]$Cond1 <- temp[1]
sep.meas.raw[[i]]$Cond2 <- temp[2]
}
sep.meas.raw <- as.data.table(rbind.fill(sep.meas.raw))
sep.meas.raw[, c("Cond1", "Cond2") := list(factor(Cond1, levels = levels(Yanni_sp$Condition)), factor(Cond2, levels = levels(Yanni_sp$Condition)))]
ggplot(sep.meas.raw, aes(x= timev, y = jm)) + geom_line() + facet_wrap(~Cond1 + Cond2, ncol = 6) + theme(text=element_text(size=25)) + ylab("Jeffries-Matusita [0, sqrt(2)]") + geom_vline(xintercept=40, colour="blue", linetype="longdash") + ggtitle("Separability along time - Raw value")
EGF is only separable at the time of the peak, long-term response similar.
NGF250 is the only condition which presents a sustained state on the long-run, and consequently presents long-term discrepancies with the other conditions which are adaptative. N0.25 can be used as a reference zero.
FGF conditions shows the weakness of the method, as it can’t tell any condition appart whereas there’s clearly a difference between those. This is because this measure only look at the range where the data are varying. FGF response is never frankly going off from “zero”. Normalization can provide a way to amplify these differences.
# Compute separabilities of conditions at each time point
sep.meas.norm <- apply(conditions, 2, function(x) sep.meas.along.time(Yanni_sp[Condition==x[1]], Yanni_sp[Condition==x[2]], "RealTime", "Ratio.norm" ))
names(sep.meas.norm) <- apply(conditions, 2, function(x) paste(x[1], x[2], sep = ","))
# Go to data table
for(i in 1:length(sep.meas.norm)){
temp <- unlist(strsplit(names(sep.meas.norm)[i], ","))
sep.meas.norm[[i]]$Cond1 <- temp[1]
sep.meas.norm[[i]]$Cond2 <- temp[2]
}
sep.meas.norm <- as.data.table(rbind.fill(sep.meas.norm))
sep.meas.norm[, c("Cond1", "Cond2") := list(factor(Cond1, levels = levels(Yanni_sp$Condition)), factor(Cond2, levels = levels(Yanni_sp$Condition)))]
ggplot(sep.meas.norm, aes(x= timev, y = jm)) + geom_line() + facet_wrap(~Cond1 + Cond2, ncol = 6) + theme(text=element_text(size=25)) + ylab("Jeffries-Matusita [0, sqrt(2)]") + geom_vline(xintercept=40, colour="blue", linetype="longdash") + ggtitle("Separability along time - Raw value")
As expected small differences for EGF and NGF get quite largely amplified.
source("../rscripts/single_peak_features.R")